Import Packages
library(tidyverse)
library(mapview)
library(sf)
library(tigris)
library(plotly)
options(
tigris_class = "sf"
)
Set root directory to local folder of NYT data
opts_knit$set(root.dir = "~/Documents/GitHub/nytimes-covid-19-data") # change this path if necessary.
# Again, when running in RStudio, use setwd() instead.
setwd("~/Documents/GitHub/nytimes-covid-19-data")
states <-
states(cb = F, progress_bar = FALSE) %>% # This comes from tigris package. Google tigris r to see full functionality. Also note that in the first chunk, we set a tigris option to automatically load polygons as sf type.
st_transform(4326) # tigris by default is in a different coordinate system, so we have to transform to maintain consistency with our points
ca_counties <-
counties("CA", cb = F, progress_bar = FALSE) %>%
st_transform(4326)
bay_counties <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
covid_nyt_state <-
read_csv("us-states.csv") %>% as.data.frame()
print(paste0("Latest date: ",max(covid_nyt_state$date)))
## [1] "Latest date: 2020-05-20"
Add a death rate column
covid_nyt_state$deathrate <- covid_nyt_state$deaths/covid_nyt_state$cases
Create df of states data filtered to latest date as sf objects
covid_nyt_by_state <-
covid_nyt_state %>%
filter(date == max(covid_nyt_state$date)) %>%
right_join(states, by = c("state" = "NAME")) %>%
st_as_sf() %>%
select(date:deathrate)
# mapview(covid_nyt_by_state, zcol = "cases")
# mapview(covid_nyt_by_state, zcol = "deaths")
# mapview(covid_nyt_by_state, zcol = "deathrate")
covid_nyt_county <-
read_csv("us-counties.csv")
print(paste0("Latest date: ",max(covid_nyt_county$date)))
## [1] "Latest date: 2020-05-20"
Add a death rate column
covid_nyt_county$deathrate <- covid_nyt_county$deaths/covid_nyt_county$cases
Filter to CA Counties
covid_nyt_ca_county <- filter(covid_nyt_county, state == "California")
Create df of CA counties data filtered to latest date as sf objects
covid_nyt_ca_by_county <-
covid_nyt_county %>%
filter(state == "California") %>%
filter(date == max(covid_nyt_state$date)) %>%
right_join(ca_counties, by = c("county" = "NAME")) %>%
st_as_sf() %>%
select(date:deathrate)
# mapview(covid_nyt_ca_by_county, zcol = "cases")
# mapview(covid_nyt_ca_by_county, zcol = "deaths")
# mapview(covid_nyt_ca_by_county, zcol = "deathrate")
covid_nyt_bay_area <- filter(covid_nyt_county, state == "California" & county %in% bay_counties)
Create df of Bay Area counties data filtered to latest date as sf objects
covid_nyt_bay_by_county <-
covid_nyt_county %>%
filter(state == "California") %>%
filter(county %in% bay_counties) %>%
filter(date == max(covid_nyt_state$date)) %>%
right_join(ca_counties %>% filter(NAME %in% bay_counties), by = c("county" = "NAME")) %>%
st_as_sf() %>%
select(date:deathrate)
mapview(covid_nyt_bay_by_county, zcol = "cases")
mapview(covid_nyt_bay_by_county, zcol = "deaths")
# mapview(covid_nyt_bay_by_county, zcol = "deathrate")
First read in US data
covid_nyt_us <-
read_csv("us.csv") %>% as.data.frame()
Simple ggplot of cases and deaths
ggplot(data = covid_nyt_us) +
geom_point(mapping = aes(x = date, y = cases), color = 'blue') +
geom_point(mapping = aes(x = date, y = deaths), color = 'orange') +
labs(x = "Date", y = "Cumulative Counts")
Put ggplot into plotly for interactive graph
cumulative_cases_timeseries <- ggplot(data = covid_nyt_us) +
geom_point(mapping = aes(x = date, y = cases), color = 'blue') +
geom_point(mapping = aes(x = date, y = deaths), color = 'orange') +
labs(x = "Date", y = "Cumulative Counts")
ggplotly(cumulative_cases_timeseries)
cumulative_cases_timeseries <- ggplot(covid_nyt_state, aes(x = date, y = cases, color = state)) +
geom_line() +
geom_point() +
labs(x = "Date", y = "Cumulative Count of Cases", color = "State")
ggplotly(cumulative_cases_timeseries)
cumulative_deaths_timeseries <- ggplot(covid_nyt_state, aes(x = date, y = deaths, color = state)) +
geom_line() +
geom_point() +
labs(x = "Date", y = "Cumulative Count of Deaths", color = "State")
ggplotly(cumulative_deaths_timeseries)
cumulative_cases_timeseries <- ggplot(covid_nyt_ca_county, aes(x = date, y = cases, color = county)) +
geom_line() +
geom_point() +
labs(x = "Date", y = "Cumulative Count of Cases", color = "County")
ggplotly(cumulative_cases_timeseries)
cumulative_deaths_timeseries <- ggplot(covid_nyt_ca_county, aes(x = date, y = deaths, color = county)) +
geom_line() +
geom_point() +
labs(x = "Date", y = "Cumulative Count of Deaths", color = "County")
ggplotly(cumulative_deaths_timeseries)
cumulative_cases_timeseries <- ggplot(covid_nyt_bay_area, aes(x = date, y = cases, color = county)) +
geom_line() +
geom_point() +
labs(x = "Date", y = "Cumulative Count of Cases", color = "County")
ggplotly(cumulative_cases_timeseries)
cumulative_deaths_timeseries <- ggplot(covid_nyt_bay_area, aes(x = date, y = deaths, color = county)) +
geom_line() +
geom_point() +
labs(x = "Date", y = "Cumulative Count of Deaths", color = "County")
ggplotly(cumulative_deaths_timeseries)